Oktay Erten, Fabio Pereira, Clayton V. Deutsch
This notebook indicates the implementation of several sampling techniques including Monte Carlo simulation (MCS), Latin hypercube sampling (LHS), Maximin Latin hypercube sampling (Maximin LHS), Latin hypercube sampling with multidimensional uniformity (LHSMDU) and projection pursuit multivariate transform (PPMT) considering the case where the variables are linearly correlated.
import pandas as pd
import numpy as np
import pygeostat as gs
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from IPython.display import display, Math, Latex
from IPython.display import Image
import sys
import random
import warnings
import seaborn as sns
from skopt.space import Space
from scipy.stats import spearmanr, pearsonr
import math
from tqdm.notebook import tqdm_notebook
from matplotlib.pyplot import figure
from time import sleep
import time
import os
from scipy import stats
import scipy
import scipy.linalg
import pprint
from matplotlib import cm
from skopt.sampler import Lhs
from statsmodels.distributions.empirical_distribution import ECDF
from numpy import savetxt
from scipy.spatial.distance import pdist
from scipy.stats import gamma
from skopt.sampler import Sobol
from pyDOE import lhs
from __future__ import absolute_import, division, print_function, unicode_literals
from numpy.linalg import norm
from numpy import array, random, matrix, zeros, triu_indices, sum, sort, argsort, ravel, max, min as minimum, mean, argpartition
from scipy.stats import rv_continuous, rv_discrete
from scipy.stats.distributions import rv_frozen
from scipy.spatial.distance import pdist, squareform
import lhsmdu
from matplotlib import collections as mc
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import truncnorm
from statsmodels.formula.api import ols
import matplotlib as mpl
import sympy as sym
from collections import OrderedDict
warnings.filterwarnings('ignore')
%autosave 60
%matplotlib inline
print (' ')
print ('The versions of the python packages are given as follows:')
print (' ')
print ('Python version :', sys.version_info)
print ('Numpy version :', np.__version__)
print ('Pandas version :', pd.__version__)
print ('Pygeostat version :', gs.__version__)
print ('Scipy version :', scipy.__version__)
print ('Matplotlib :', mpl.__version__)
pd.set_option ('display.max_columns', 700)
pd.set_option ('display.max_rows', 400)
pd.set_option ('display.min_rows', 10)
pd.set_option ('display.expand_frame_repr', True)
plt.rcParams ['figure.figsize'] = (5.0, 5.0)
#plt.style.use('seaborn-dark-palette')
plt.rcParams ['axes.grid'] = True
plt.rcParams ["patch.force_edgecolor"] = True
sns.set()
print ('----------------------------------------------------------- ')
print ('The working directory for this study is given as follows:')
print ('----------------------------------------------------------- ')
print (' ')
print (os.getcwd ())
os.chdir(r'C:\Users\erten\Desktop\github_ppmt_sampling')
print ('------------------------------------------------------------- ')
print ('The new working directory for this study is given as follows:')
print ('------------------------------------------------------------- ')
print (' ')
print (os.getcwd ())
GSLIB programs' folder¶exedir = 'exes/'
outdir = 'output/'
gs.mkdir = (outdir)
CLHS and Maximin LHS sampling¶"""
This code was originally published by the following individuals for use with
Scilab:
Copyright (C) 2012 - 2013 - Michael Baudin
Copyright (C) 2012 - Maria Christopoulou
Copyright (C) 2010 - 2011 - INRIA - Michael Baudin
Copyright (C) 2009 - Yann Collette
Copyright (C) 2009 - CEA - Jean-Marc Martinez
website: forge.scilab.org/index.php/p/scidoe/sourcetree/master/macros
Much thanks goes to these individuals. It has been converted to Python by
Abraham Lee.
"""
'''Classic latin hypercube sampling'''
def _lhsclassic (n, samples):
# Generate the intervals
cut = np.linspace (0, 1, samples + 1)
# Fill points uniformly in each interval
u = np.random.rand (samples, n)
a = cut [:samples]
b = cut [1:samples + 1]
rdpoints = np.zeros_like(u)
for j in range(n):
rdpoints [:, j] = u [:, j] * (b - a) + a
# Make the random pairings
H = np.zeros_like (rdpoints)
for j in range(n):
order = np.random.permutation (range (samples))
H [:, j] = rdpoints [order, j]
return H
'''Calculate the pair-wise point distances of a matrix'''
def _pdist (x):
x = np.atleast_2d (x)
assert len (x.shape) == 2, 'Input array must be 2d-dimensional'
m, n = x.shape
if m < 2:
return []
d = []
for i in range (m - 1):
for j in range (i + 1, m):
d.append ((sum ((x [j, :] - x [i, :]) ** 2)) ** 0.5)
return np.array (d)
'''Maximin latin hypercube sampling'''
def _lhsmaximin (n, samples, iterations, lhstype):
maxdist = 0
# Maximize the minimum distance between points
for i in range (iterations):
if lhstype == 'maximin':
Hcandidate = _lhsclassic (n, samples)
else:
Hcandidate = _lhscentered (n, samples)
d = _pdist (Hcandidate)
if maxdist < np.min (d):
maxdist = np.min (d)
H = Hcandidate.copy()
return H
def cov_mat (var = [], corr = []):
'''This function creates a symmetric covariance matrix using the
variance and covariance values given by the user. It is noted that
the variance and covariance values should be given in order. For
example, Variance (variable1), Variance (variable2), ..., and
Covariance (variable1 - variable2), Covariance (variable1 - variable3),
...'''
if (len (var) * (len (var) + 1)) / 2 != len (var) + len (corr):
print ('Please check the inputs again...')
sys.exit ()
else:
a = np.zeros ((len (var), len (var)), dtype = float)
np.fill_diagonal (a, var)
a[np.triu_indices (len (var), 1)] = corr
i_lower = np.tril_indices (len (var), -1)
a [i_lower] = a.T [i_lower]
print (' ')
print ('The covariance matrix for the given inputs is as follows:')
print (' ')
display (a)
return a
def SeedVal (datafl, colname):
x = int (input ('Enter the number of realizations needed: '))
list_seed = []
for i in tqdm_notebook (range (x), total = x, desc = 'Loop 1'):
seeds = pd.read_csv (datafl)
b = seeds [colname][i]
list_seed.append (b)
del b
time.sleep (0.0000000001)
return list_seed
Example on the original oil in place (OOIP). OOIP can be calculated as follows:
$$\textrm{OOIP} = \textrm{CAT} \times \textrm{NTG} \times \phi_{net} (1-\textrm{S}_{\textrm{w}})$$where $\textrm{C}=1$ is the constant to account for units; $\textrm{A}$ is the deposit area; $\textrm{T}$ is the thickness of the deposit; $\textrm{NTG}$ is the net oil to gross volume; $\phi_{net}$ is the net porosity and $\textrm{S}_{\textrm{w}}$ is the water saturation. The parametric distributions and parameters used for $\textrm{OOIP}$ variables are given as follows:
t0 = time.perf_counter()
mpl.style.use('default')
fig, axes = plt.subplots(2, 3, figsize = (20, 8))
area = np.random.triangular (2, 4, 6, 10000000)
sns.distplot (area, bins = 40, ax = axes [0, 0])
axes[0, 0].set (xlabel = 'Area of deposit (A)', ylabel = 'Density')
axes[0, 0].set_title ('Triangular; \n a = 2, b = 4, c = 6')
axes [0, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
thick = np.random.normal(10, 1, 10000000)
sns.distplot (thick, bins = 40, ax = axes [0, 1])
axes [0, 1].set (xlabel = 'Thickness of deposit (T)', ylabel = 'Density')
axes[0, 1].set_title ('Gaussian; \n m = 10, \u03C3 = 1')
plt.xlim (-5, 25)
axes [0, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
net_oil = np.random.uniform (0.6, 0.8, 10000000)
sns.distplot (net_oil, bins = 40, ax = axes [0, 2])
axes[0, 2].set (xlabel = 'Net oil to gross volume (NTG)', ylabel = 'Density')
axes[0, 2].set_title ('Uniform; \n a = 0.6, b = 0.8')
axes[0, 2].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
net_poros = np.random.triangular (0.15, 0.25, 0.35, 10000000)
sns.distplot (net_poros, bins = 40, ax = axes [1, 0])
axes[1, 0].set (xlabel = 'Net porosity (\u03A6'+'$_{net}$)', ylabel = 'Density')
axes[1, 0].set_title ('Triangular; \n a = 0.15, b = 0.25, c = 0.35')
axes[1, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
water_sat = np.random.triangular (0.15, 0.2, 0.3, 10000000)
sns.distplot (water_sat, bins = 40, ax = axes [1, 1])
axes[1, 1].set (xlabel = 'Water saturation '+ '($S_{w}$)', ylabel = 'Density')
axes[1, 1].set_title ('Triangular; \n a = 0.15, b = 0.2, c = 0.3')
axes[1, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
fig.delaxes (axes[1][2])
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_1.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
t1 = time.perf_counter() - t0
print (' ')
print ('Current processor time (in seconds):', t1)
constant = 1
ooip = constant * area * thick * net_oil * (net_poros * (1 - water_sat))
ooip
def ecdf(data):
"""Compute ECDF for a one-dimensional
array of measurements."""
n = len (data)
x = np.sort (data)
y = np.arange (1, n + 1) / n
return x, y
varias = [area, thick, net_oil, net_poros, water_sat]
results = []
for i in tqdm_notebook (varias, total = len (varias), desc = 'Loop 1'):
kk = ecdf (i)
results.append (kk)
del kk
time.sleep (0.0000001)
mpl.style.use('default')
fig, axes = plt.subplots(2, 3, figsize = (20, 8))
axes [0, 0].plot (results [0][0], results [0][1], linestyle = '-', linewidth = 2)
axes[0, 0].set (xlabel = 'Area of deposit ($A$)', ylabel = 'F(A)')
axes[0, 0].set_title ('Triangular; \n a = 2, b = 4, c = 6')
plt.xlim (2, 6)
plt.ylim (-0.02, 1.02)
axes [0, 0].set_yticks (np.arange(0,1.1,0.1))
axes [0, 0].set_xticks (np.arange(2,7,1))
axes [0, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [0, 1].plot (results [1][0], results [1][1], linestyle = '-', linewidth = 2)
axes [0, 1].set (xlabel = 'Thickness of deposit ($T$)', ylabel = 'F(T)')
axes[0, 1].set_title ('Gaussian; \n m = 10, \u03C3 = 1')
plt.xlim (4, 15)
plt.ylim (-0.02, 1.02)
axes [0, 1].set_yticks (np.arange(0,1.1,0.1))
axes [0, 1].set_xticks (np.arange(4,16,1))
axes [0, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [0, 2].plot (results [2][0], results [2][1], linestyle = '-', linewidth = 2)
axes [0, 2].set (xlabel = 'Net oil to gross volume ($NTG$)', ylabel = 'F(NTG)')
axes[0, 2].set_title ('Uniform; \n a = 0.6, b = 0.8')
plt.xlim (0.6, 0.85)
plt.ylim (-0.02, 1.02)
axes [0, 2].set_yticks (np.arange(0,1.1,0.1))
axes [0, 2].set_xticks (np.arange(0.6,0.85,0.05))
axes [0, 2].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [1, 0].plot (results [3][0], results [3][1], linestyle = '-', linewidth = 2)
axes[1, 0].set (xlabel = 'Net porosity (\u03A6'+'$_{net}$)', ylabel = 'F(\u03A6'+'$_{net}$)')
axes[1, 0].set_title ('Triangular; \n a = 0.15, b = 0.25, c = 0.35')
plt.xlim (0.15, 0.4)
plt.ylim (-0.02, 1.02)
axes [1, 0].set_yticks (np.arange(0,1.1,0.1))
axes [1, 0].set_xticks (np.arange(0.15,0.4,0.05))
axes [1, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [1, 1].plot (results [4][0], results [4][1], linestyle = '-', linewidth = 2)
axes[1, 1].set (xlabel = 'Water saturation '+ '($S_{w}$)', ylabel = 'F($S_{w}$)')
axes[1, 1].set_title ('Triangular; \n a = 0.15, b = 0.2, c = 0.3')
plt.xlim (0.15, 0.35)
plt.ylim (-0.02, 1.02)
axes [1, 1].set_yticks (np.arange(0,1.1,0.1))
axes [1, 1].set_xticks (np.arange(0.15,0.35,0.05))
axes [1, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
fig.delaxes (axes[1][2])
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_2.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
x_popul, y_popul = ecdf (ooip)
mpl.style.use('default')
fig, ax = plt.subplots(figsize = (4, 4.1))
ax.plot(x_popul, y_popul, linestyle = '-', linewidth = 2)
plt.xlabel('OOIP')
plt.ylabel('F(OOIP)')
plt.xlim (0, 16.05)
plt.ylim (0, 1)
ax.set_yticks (np.arange(0,1.1,0.1))
ax.set_xticks (np.arange(0,16.05,2))
ax.grid (color = 'gray', linestyle = '--', linewidth = 0.6)
#plt.margins(0.05) # keep data off plot edges
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_3.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
MCS)¶100 different seed values have been used.
seeds = SeedVal (datafl = 'seed.csv', colname = 'seed_nums')
print ('The seed values for the specified number of realizations are given as follows: ', seeds)
MCS sampling technique¶def rand_sample (n, samples, nreals, seed = None):
'''This function takes the random samples between
0 and 1. The arguments of the function includes
n = dimension, samples = number of samples, nreals =
number of realizations'''
u_list = []
if seed == None:
for j in range (nreals):
u = np.random.rand (samples, n)
u_list.append (u)
del u
elif isinstance (seed, list) == False:
seed_value = seed
np.random.seed (seed_value)
u = np.random.rand (samples, n)
elif isinstance (seed, list) == True:
for j in range (nreals):
seed_value = seed
np.random.seed (seed_value [j])
u = np.random.rand (samples, n)
u_list.append (u)
del u
return u_list
MCS¶list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
mcs_samples = []
for i in tqdm_notebook (list_of_reals, total = 20, desc = 'Loop 1'):
cc = rand_sample (n = 5, samples = i, nreals = 100, seed = seeds)
mcs_samples.append (cc)
del cc
time.sleep (0.000000000001)
dataframes = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results [i][0], results [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes.append (x)
del x
time.sleep (0.00000001)
g = []
for i in range (20):
i = [[], [], [], [], []]
g.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], mcs_samples [i][j].T [k])
g[i][k].append (f)
del f
time.sleep (0.00000001)
CLHS)¶CLHS sampling technique¶def classic_lhs (n, samples, nreals, seed = None):
classic_samples = []
if seed == None:
for j in range (nreals):
a = _lhsclassic (n, samples)
classic_samples.append (a)
del a
elif isinstance (seed, list) == False:
seed_value = seed
np.random.seed (seed_value)
a = _lhsclassic (n, samples)
elif isinstance (seed, list) == True:
for j in range (nreals):
seed_value = seed
np.random.seed (seed_value [j])
a = _lhsclassic (n, samples)
classic_samples.append (a)
del a
return classic_samples
CLHS¶list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
classic_lhs_samples = []
for i in tqdm_notebook (list_of_reals, total = 20, desc = 'Loop 1'):
vv = classic_lhs (n = 5, samples = i, nreals = 100, seed = seeds)
classic_lhs_samples.append (vv)
del vv
time.sleep (0.000000000001)
dataframes = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results [i][0], results [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes.append (x)
del x
del v
time.sleep (0.00000001)
g_clhs = []
for i in range (20):
i = [[], [], [], [], []]
g_clhs.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], classic_lhs_samples [i][j].T [k])
g_clhs [i][k].append (f)
del f
time.sleep (0.00000001)
Maximin LHS)¶Maximin LHS sampling technique¶def maximin_lhs (n, samples, iterations, lhstype, nreals, seed = None):
maximin_samples = []
if seed == None:
for j in range (nreals):
a = _lhsmaximin (n, samples, iterations , lhstype)
maximin_samples.append (a)
del a
elif isinstance (seed, list) == False:
seed_value = seed
np.random.seed (seed_value)
a = _lhsmaximin (n, samples, iterations , lhstype)
elif isinstance (seed, list) == True:
for j in range (nreals):
seed_value = seed
np.random.seed (seed_value [j])
a = _lhsmaximin (n, samples, iterations , lhstype)
maximin_samples.append (a)
del a
return maximin_samples
Maximin LHS¶list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
maximin_lhs_samples = []
iters = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,100, 100, 100, 10,
10, 10, 10, 10, 10]
reals_num = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 10, 10, 10, 10, 1, 1, 1, 1, 1, 1]
for i, j, u in tqdm_notebook (zip (list_of_reals, iters, reals_num), total = 20, desc = 'Loop 1'):
kkk = maximin_lhs (n = 5, samples = i, iterations = j, lhstype = 'maximin', nreals = u, seed = seeds)
maximin_lhs_samples.append (kkk)
del kkk
time.sleep (0.000000000001)
dataframes = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results [i][0], results [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes.append (x)
del x
del v
time.sleep (0.00000001)
g_maxi_lhs = []
for i in range (20):
i = [[], [], [], [], []]
g_maxi_lhs.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
if i >= 0 and i < 10:
for j in range (100):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], maximin_lhs_samples [i][j].T [k])
g_maxi_lhs [i][k].append (f)
del f
elif i > 9 and i < 14:
for j in range (10):
for k in range (5):
ff = np.quantile (dataframes [k] ['values'], maximin_lhs_samples [i][j].T [k])
g_maxi_lhs [i][k].append (ff)
del ff
elif i > 13 and i < 20:
for j in range (1):
for k in range (5):
ffd = np.quantile (dataframes [k] ['values'], maximin_lhs_samples [i][j].T [k])
g_maxi_lhs [i][k].append (ffd)
del ffd
time.sleep (0.00000001)
LHSMDU)¶LHSMDU.EXE GSLIB program and its parameter file¶lhsmdu = gs.Program (exedir + 'lhsmdu.exe', getpar = True)
lhsmdupar = """ Parameters for LHSMDU
*********************
START OF PARAMETERS:
{nvar} - N number of variables
{nreals} - L number of realizations
5 - M realization initialization factor
{seed} - random number generator seed
Nofile
Nofile
{output_3}
Nofile
0 -consider correlation matrix? (0=no,1=yes)
1.000 0.000 0.000 0.000 0.000
0.000 1.000 0.000 0.000 0.000
0.000 0.000 1.000 0.000 0.000
0.000 0.000 0.000 1.000 0.000
0.000 0.000 0.000 0.000 1.000
"""
list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
for k, l in tqdm_notebook (zip (range (100), seeds), total = 100, desc = 'Loop 1'):
k = k + 1
for j in range (10, 110, 10):
lhsmdu.run (parstr = lhsmdupar.format (nvar = 5, nreals = j, seed = l,
output_3 = outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (k, j)),
liveoutput = False)
for k, l in tqdm_notebook (zip (range (10), seeds), total = 10, desc = 'Loop 1'):
k = k + 1
for j in range (200, 600, 100):
lhsmdu.run (parstr = lhsmdupar.format (nvar = 5, nreals = j, seed = l,
output_3 = outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (k, j)),
liveoutput = False)
for k, l in tqdm_notebook (zip (range (1), seeds), total = 1, desc = 'Loop 1'):
k = k + 1
for j in range (1000, 11000, 2000):
lhsmdu.run (parstr = lhsmdupar.format (nvar = 5, nreals = j, seed = l,
output_3 = outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (k, j)),
liveoutput = False)
for k, l in tqdm_notebook (zip (range (1), seeds), total = 1, desc = 'Loop 1'):
k = k + 1
for j in range (10000, 11000, 1000):
lhsmdu.run (parstr = lhsmdupar.format (nvar = 5, nreals = j, seed = l,
output_3 = outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (k, j)),
liveoutput = False)
time.sleep (0.00000001)
LHSMDU.EXE GSLIB program¶list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
lhsmdu_samp_1 = []
for i in tqdm_notebook (list_of_reals [0:10] , total = 10, desc = 'Loop 1'):
for j in range (100):
j = j + 1
c = gs.DataFile (outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (j, i))
lhsmdu_samp_1.append (c)
del c
time.sleep (0.00000001)
lhsmdu_samp_2 = []
for i in tqdm_notebook (list_of_reals[10:14], total = 4, desc = 'Loop 2'):
for j in range (10):
j = j + 1
b = gs.DataFile (outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (j, i))
lhsmdu_samp_2.append (b)
del b
time.sleep (0.00000001)
lhsmdu_samp_3 = []
for i in tqdm_notebook (list_of_reals[14:20], total = 6, desc = 'Loop 3'):
for j in range (1):
j = j + 1
bb = gs.DataFile (outdir + 'lhsmdu/lhsmdu_{}_{}.out'.format (j, i))
lhsmdu_samp_3.append (bb)
del bb
time.sleep (0.00000001)
lhsmdu_array_1 = []
for j in range (1000):
f = lhsmdu_samp_1 [j].data.to_numpy ()
lhsmdu_array_1.append (f)
del f
lhsmdu_array_2 = []
for j in range (40):
d = lhsmdu_samp_2 [j].data.to_numpy ()
lhsmdu_array_2.append (d)
del d
lhsmdu_array_3 = []
for j in range (6):
dp = lhsmdu_samp_3 [j].data.to_numpy ()
lhsmdu_array_3.append (dp)
del dp
first_array = [lhsmdu_array_1 [0:100], lhsmdu_array_1 [100:200], lhsmdu_array_1 [200:300], lhsmdu_array_1 [300:400],
lhsmdu_array_1 [400:500], lhsmdu_array_1 [500:600], lhsmdu_array_1 [600:700], lhsmdu_array_1 [700:800],
lhsmdu_array_1 [800:900], lhsmdu_array_1 [900:1000], lhsmdu_array_2 [0:10], lhsmdu_array_2 [10:20],
lhsmdu_array_2 [20:30], lhsmdu_array_2 [30:40], lhsmdu_array_3 [0:1], lhsmdu_array_3 [1:2],
lhsmdu_array_3 [2:3], lhsmdu_array_3 [3:4], lhsmdu_array_3 [4:5], lhsmdu_array_3 [5:6]]
dataframes = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results [i][0], results [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes.append (x)
del x
del v
time.sleep (0.00000001)
g_mdu_lhs = []
for i in range (20):
i = [[], [], [], [], []]
g_mdu_lhs.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
if i >= 0 and i < 10:
for j in range (100):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], first_array [i][j].T [k])
g_mdu_lhs [i][k].append (f)
del f
elif i > 9 and i < 14:
for j in range (10):
for k in range (5):
ff = np.quantile (dataframes [k] ['values'], first_array [i][j].T [k])
g_mdu_lhs [i][k].append (ff)
del ff
elif i > 13 and i < 20:
for j in range (1):
for k in range (5):
ffd = np.quantile (dataframes [k] ['values'], first_array [i][j].T [k])
g_mdu_lhs [i][k].append (ffd)
del ffd
time.sleep (0.00000001)
PPMT)¶MCS sampling technique¶def rand_sample (n, samples, nreals, seed = None):
'''This function takes the random samples between
0 and 1. The arguments of the function includes
n = dimension, samples = number of samples, nreals =
number of realizations'''
u_list = []
if seed == None:
for j in range (nreals):
u = np.random.rand (samples, n)
u_list.append (u)
del u
elif isinstance (seed, list) == False:
seed_value = seed
np.random.seed (seed_value)
u = np.random.rand (samples, n)
elif isinstance (seed, list) == True:
for j in range (nreals):
seed_value = seed
np.random.seed (seed_value [j])
u = np.random.rand (samples, n)
u_list.append (u)
del u
return u_list
MCS¶list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
ppmt_samples = []
for j in tqdm_notebook (list_of_reals [0:17], total = 17, desc = 'Loop 1'):
cc = rand_sample (n = 5, samples = j, nreals = 100, seed = seeds)
ppmt_samples.append (cc)
del cc
time.sleep (0.000000000001)
for j in tqdm_notebook (list_of_reals [17:20], total = 3, desc = 'Loop 2'):
ccc = rand_sample (n = 5, samples = j, nreals = 10, seed = seeds)
ppmt_samples.append (ccc)
del ccc
time.sleep (0.000000000001)
columns = []
for j in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
j = j + 1
x = 'var{}'.format (j)
columns.append (x)
del x
time.sleep (0.00001)
columns
ppmt_input_inv_cdf = []
for i in range (20):
i = []
ppmt_input_inv_cdf.append (i)
list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
for i in tqdm_notebook (range (0, 17, 1), total = 17, desc = 'Loop 1'):
for j in range (100):
a = stats.norm.ppf (ppmt_samples [i][j])
ppmt_input_inv_cdf [i].append (a)
del a
time.sleep (0.000001)
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 2'):
for j in range (10):
b = stats.norm.ppf (ppmt_samples [i][j])
ppmt_input_inv_cdf [i].append (b)
del b
time.sleep (0.000001)
for i, k in tqdm_notebook (zip (range (20), list_of_reals) , total = 20, desc = 'Loop 3'):
if i >=0 and i < 17:
for j, h in zip (range (100), range (100)):
h = h + 1
d = pd.DataFrame (ppmt_input_inv_cdf [i][j], columns = columns)
gs.write_gslib (d, outdir + 'ppmt/datafl_{}_{}.out'.format (h, k))
del d
elif i >=17 and i < 20:
for j, h in zip (range (10), range (10)):
h = h + 1
dd = pd.DataFrame (ppmt_input_inv_cdf [i][j], columns = columns)
gs.write_gslib (dd, outdir + 'ppmt/datafl_{}_{}.out'.format (h, k))
del dd
time.sleep (0.000001)
PPMT.EXE GSLIB program and its parameter file¶ppmt = gs.Program (program = exedir + 'ppmt.exe', getpar = True)
ppmtparf = """ Parameters for PPMT
*******************
START OF PARAMETERS:
{datafl} - input data file
5 1 2 3 4 5 0 - number of variables, variable cols, and wt col
-1.0e7 1.0e7 - trimming limits
25 50 50 - min/max iterations and targeted Gauss perc. (see Note 1)
0 - spatial decorrelation? (0=no,1=yes) (see Note 2)
0 0 0 - x, y, z columns (0=none for z)
0 0 - lag distance, lag tolerance
{output_1} - output data file
Nofilr - output transformation table (binary)
Note 1: Optional stopping criteria, where the projection pursuit algorithm will terminate
after reaching the targetted Gaussian percentile. The input percentile range is 1 (very Gaussian)
to 99 (barely Gaussian); the percentiles are calculated using random Gaussian distributions.
The min/max iterations overrides the targetted Gaussian percentile.
Note 2: Option to apply min/max autocorrelation factors after the projection pursuit algorithm
to decorrelate the variables at the specified non-zero lag distance.
"""
list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
for j in tqdm_notebook (range (100), total = 100, desc = 'Loop 1'):
j = j + 1
for i in list_of_reals [0: 17]:
ppmt.run (parstr = ppmtparf.format (datafl = outdir + 'ppmt/datafl_{}_{}.out'.format (j, i)
, output_1 = outdir + 'ppmt/ppmt_1/ppmt_{}_{}.out'.format (j, i)),
liveoutput = False)
time.sleep (0.000001)
for j in tqdm_notebook (range (10), total = 10, desc = 'Loop 2'):
j = j + 1
for i in list_of_reals [17:20]:
ppmt.run (parstr = ppmtparf.format (datafl = outdir + 'ppmt/datafl_{}_{}.out'.format (j, i)
, output_1 = outdir + 'ppmt/ppmt_1/ppmt_{}_{}.out'.format (j, i)),
liveoutput = False)
time.sleep (0.000001)
list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
ppmt_samp = []
for i in range (20):
i = []
ppmt_samp.append (i)
for i, j in tqdm_notebook (zip (list_of_reals [0:17], range (17)), total = 17, desc = 'Loop 1'):
for k in range (100):
k = k + 1
a = gs.DataFile (outdir + 'ppmt/ppmt_1/ppmt_{}_{}.out'.format (k, i), readfl = True)
if a.data.isnull().values.any() == True:
print (i, j, k)
ppmt_samp [j].append (a)
del a
time.sleep (0.00000000001)
for i, j in tqdm_notebook (zip (list_of_reals [17:20], range (17, 20, 1)), total = 3, desc = 'Loop 2'):
for k in range (10):
k = k + 1
a = gs.DataFile (outdir + 'ppmt/ppmt_1/ppmt_{}_{}.out'.format (k, i), readfl = True)
if a.data.isnull().values.any() == True:
print (i, j, k)
ppmt_samp [j].append (a)
del a
time.sleep (0.00000000001)
ppmt_trans = []
for i in range (20):
i = []
ppmt_trans.append (i)
for i in tqdm_notebook (range (17) , total = 17, desc = 'Loop 1'):
for j in range (100):
for k in range (5):
k = k + 1
ppmt = stats.norm.cdf (ppmt_samp [i][j].data ['PPMT:var{}'.format (k)])
ppmt_trans [i].append (ppmt)
del ppmt
time.sleep (0.0000000001)
for i in tqdm_notebook (range (17, 20, 1) , total = 3, desc = 'Loop 2'):
for j in range (10):
for k in range (5):
k = k + 1
ppmt = stats.norm.cdf (ppmt_samp [i][j].data ['PPMT:var{}'.format (k)])
ppmt_trans [i].append (ppmt)
del ppmt
time.sleep (0.0000000001)
stacked = []
for i in range (20):
i = []
stacked.append (i)
for i in tqdm_notebook (range (17), total = 17, desc = 'Loop 1'):
for j, k in zip (range (0, 500, 5), range (5, 505, 5)):
a = np.stack (ppmt_trans [i][j:k], axis = 1)
stacked [i].append (a)
del a
time.sleep (0.000000001)
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 2'):
for j, k in zip (range (0, 50, 5), range (5, 55, 5)):
a = np.stack (ppmt_trans [i][j:k], axis = 1)
stacked [i].append (a)
del a
time.sleep (0.000000001)
ppmt_s = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 3'):
s = stacked [i]
ppmt_s.append (s)
del s
time.sleep (0.000000001)
dataframes = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results [i][0], results [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes.append (x)
del x
time.sleep (0.00000001)
g_ppmt_reals = []
for i in range (20):
i = [[], [], [], [], []]
g_ppmt_reals.append (i)
for i in tqdm_notebook (range (17), total = 17, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], ppmt_s [i][j].T [k])
g_ppmt_reals [i][k].append (f)
del f
time.sleep (0.00000001)
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 3'):
for j in range (10):
for k in range (5):
f = np.quantile (dataframes [k] ['values'], ppmt_s [i][j].T [k])
g_ppmt_reals [i][k].append (f)
del f
time.sleep (0.00000001)
Now, we consider that the five variables are linearly correlated with one another. The strength of the correlation between variables is given by the following matrix $\textrm{A}$:
\begin{equation*} \textbf{A} = \begin{bmatrix} 1 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0.3 & 0.25 & -0.4 \\ 0 & 0.3 & 1 & 0.4 & -0.5 \\ 0 & 0.25 & 0.4 & 1 & -0.6 \\ 0 & -0.4 & -0.5 & -0.6 & 1 \end{bmatrix}_{ 5\times 5} \end{equation*}a = zip (area, thick, net_oil, net_poros, water_sat)
population_data = pd.DataFrame (a, columns = ['Area', 'Thickness', 'Net oil to gross volume', 'Net porosity',
'Water saturation'])
population_data.head ()
mpl.style.use('default')
plt.figure (figsize = (24, 12))
plt.subplot (3, 4, 1)
g = sns.scatterplot (x = f'Area', y = f'Thickness', data = population_data, s = 5)
plt.subplot (3, 4, 2)
g = sns.scatterplot (x = f'Area', y = f'Net oil to gross volume', data = population_data, s = 5)
plt.subplot (3, 4, 3)
g = sns.scatterplot (x = f'Area', y = f'Net porosity', data = population_data, s = 5)
plt.subplot (3, 4, 4)
g = sns.scatterplot (x = f'Area', y = f'Water saturation', data = population_data, s = 5)
plt.subplot (3, 4, 5)
g = sns.scatterplot (x = f'Thickness', y = f'Net oil to gross volume', data = population_data, s = 5)
plt.subplot (3, 4, 6)
g = sns.scatterplot (x = f'Thickness', y = f'Net porosity', data = population_data, s = 5)
plt.subplot (3, 4, 7)
g = sns.scatterplot (x = f'Thickness', y = f'Water saturation', data = population_data, s = 5)
plt.subplot (3, 4, 8)
g = sns.scatterplot (x = f'Net oil to gross volume', y = f'Net porosity', data = population_data, s = 5)
plt.subplot (3, 4, 9)
g = sns.scatterplot (x = f'Net oil to gross volume', y = f'Water saturation', data = population_data, s = 5)
plt.subplot (3, 4, 10)
g = sns.scatterplot (x = f'Net porosity', y = f'Water saturation', data = population_data, s = 5)
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.35)
plt.savefig (outdir + 'figures/figure_4.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
variance = [1, 1, 1, 1, 1]
corre = [0, 0, 0, 0, 0.3, 0.25, -0.4, 0.4, -0.5, -0.6]
cov_matrix_fvar = cov_mat (var = variance, corr = corre)
l_matrix_ = scipy.linalg.cholesky (cov_matrix_fvar, lower = True)
pprint.pprint (l_matrix_)
u_matrix_ = scipy.linalg.cholesky (cov_matrix_fvar, lower = False)
pprint.pprint (u_matrix_)
1 seed value is required.
seeds = SeedVal (datafl = 'seed.csv', colname = 'seed_nums')
print ('The seed values for the specified number of realizations are given as follows: ', seeds)
MCS sampling technique¶def rand_sample (n, samples, nreals, seed = None):
'''This function takes the random samples between
0 and 1. The arguments of the function includes
n = dimension, samples = number of samples, nreals =
number of realizations'''
u_list = []
if seed == None:
for j in range (nreals):
u = np.random.rand (samples, n)
u_list.append (u)
del u
elif isinstance (seed, list) == False:
seed_value = seed
np.random.seed (seed_value)
u = np.random.rand (samples, n)
elif isinstance (seed, list) == True:
for j in range (nreals):
seed_value = seed
np.random.seed (seed_value [j])
u = np.random.rand (samples, n)
u_list.append (u)
del u
return u_list
pp = rand_sample (n = 5, samples = 10000000, nreals = 1, seed = seeds)
pp
pp_inv = stats.norm.ppf (pp)
pp_inv [0]
popul_corr_gaus = np.matmul (l_matrix_, pp_inv [0].T)
popul_corr_gaus
popul_corr_quan = stats.norm.cdf (popul_corr_gaus )
popul_corr_quan [0]
popul_core = []
for i, j in tqdm_notebook (zip (range (5), range (5)), total = 5, desc = 'Loop 1'):
gg = np.quantile (dataframes [i] ['values'], popul_corr_quan [j])
popul_core.append (gg)
time.sleep (0.00000000001)
ss = zip (popul_core [0], popul_core [1], popul_core [2], popul_core [3], popul_core [4])
population_data_core = pd.DataFrame (ss, columns = ['Area', 'Thickness', 'Net oil to gross volume', 'Net porosity',
'Water saturation'])
population_data_core.head ()
mpl.style.use('default')
plt.figure (figsize = (24, 12))
plt.subplot (3, 4, 1)
g = sns.scatterplot (x = f'Area', y = f'Thickness', data = population_data_core, s = 5)
plt.subplot (3, 4, 2)
g = sns.scatterplot (x = f'Area', y = f'Net oil to gross volume', data = population_data_core, s = 5)
plt.subplot (3, 4, 3)
g = sns.scatterplot (x = f'Area', y = f'Net porosity', data = population_data_core, s = 5)
plt.subplot (3, 4, 4)
g = sns.scatterplot (x = f'Area', y = f'Water saturation', data = population_data_core, s = 5)
plt.subplot (3, 4, 5)
g = sns.scatterplot (x = f'Thickness', y = f'Net oil to gross volume', data = population_data_core, s = 5)
plt.subplot (3, 4, 6)
g = sns.scatterplot (x = f'Thickness', y = f'Net porosity', data = population_data_core, s = 5)
plt.subplot (3, 4, 7)
g = sns.scatterplot (x = f'Thickness', y = f'Water saturation', data = population_data_core, s = 5)
plt.subplot (3, 4, 8)
g = sns.scatterplot (x = f'Net oil to gross volume', y = f'Net porosity', data = population_data_core, s = 5)
plt.subplot (3, 4, 9)
g = sns.scatterplot (x = f'Net oil to gross volume', y = f'Water saturation', data = population_data_core, s = 5)
plt.subplot (3, 4, 10)
g = sns.scatterplot (x = f'Net porosity', y = f'Water saturation', data = population_data_core, s = 5)
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.35)
plt.savefig (outdir + 'figures/figure_5.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
def ecdf(data):
"""Compute ECDF for a one-dimensional
array of measurements."""
n = len (data)
x = np.sort (data)
y = np.arange (1, n + 1) / n
return x, y
popul_array_core = population_data_core.to_numpy ()
popul_array_core.T [0]
results_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
kk = ecdf (popul_array_core.T [i])
results_core.append (kk)
del kk
time.sleep (0.0000001)
mpl.style.use('default')
fig, axes = plt.subplots(2, 3, figsize = (20, 8))
axes [0, 0].plot (results_core [0][0], results_core [0][1], linestyle = '-', linewidth = 2)
axes[0, 0].set (xlabel = 'Area of deposit ($A$)', ylabel = 'F(A)')
axes[0, 0].set_title ('Triangular; \n a = 2, b = 4, c = 6')
plt.xlim (2, 6)
plt.ylim (-0.02, 1.02)
axes [0, 0].set_yticks (np.arange(0,1.1,0.1))
axes [0, 0].set_xticks (np.arange(2,7,1))
axes [0, 1].plot (results_core [1][0], results_core [1][1], linestyle = '-', linewidth = 2)
axes [0, 1].set (xlabel = 'Thickness of deposit ($T$)', ylabel = 'F(T)')
axes[0, 1].set_title ('Gaussian; \n m = 10, \u03C3 = 1')
plt.xlim (4, 15)
plt.ylim (-0.02, 1.02)
axes [0, 1].set_yticks (np.arange(0,1.1,0.1))
axes [0, 1].set_xticks (np.arange(4,16,1))
axes [0, 2].plot (results_core [2][0], results_core [2][1], linestyle = '-', linewidth = 2)
axes [0, 2].set (xlabel = 'Net oil to gross volume ($NTG$)', ylabel = 'F(NTG)')
axes[0, 2].set_title ('Uniform; \n a = 0.6, b = 0.8')
plt.xlim (0.6, 0.85)
plt.ylim (-0.02, 1.02)
axes [0, 2].set_yticks (np.arange(0,1.1,0.1))
axes [0, 2].set_xticks (np.arange(0.6,0.85,0.05))
axes [1, 0].plot (results_core [3][0], results_core [3][1], linestyle = '-', linewidth = 2)
axes[1, 0].set (xlabel = 'Net porosity (\u03A6'+'$_{net}$)', ylabel = 'F(\u03A6'+'$_{net}$)')
axes[1, 0].set_title ('Triangular; \n a = 0.15, b = 0.25, c = 0.35')
plt.xlim (0.15, 0.4)
plt.ylim (-0.02, 1.02)
axes [1, 0].set_yticks (np.arange(0,1.1,0.1))
axes [1, 0].set_xticks (np.arange(0.15,0.4,0.05))
axes [1, 1].plot (results_core [4][0], results_core [4][1], linestyle = '-', linewidth = 2)
axes[1, 1].set (xlabel = 'Water saturation '+ '($S_{w}$)', ylabel = 'F($S_{w}$)')
axes[1, 1].set_title ('Triangular; \n a = 0.15, b = 0.2, c = 0.3')
plt.xlim (0.15, 0.35)
plt.ylim (-0.02, 1.02)
axes [1, 1].set_yticks (np.arange(0,1.1,0.1))
axes [1, 1].set_xticks (np.arange(0.15,0.35,0.05))
fig.delaxes (axes[1][2])
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_6.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
constant = 1
ooip_core = constant * population_data_core ['Area'] * population_data_core ['Thickness'] * population_data_core ['Net oil to gross volume'] * (population_data_core ['Net porosity'] * (1 - population_data_core ['Water saturation']))
ooip_core
x_popul_core, y_popul_core = ecdf (ooip_core)
mpl.style.use('default')
fig, ax = plt.subplots(figsize = (4, 4.1))
ax.plot(x_popul_core, y_popul_core, linestyle = '-', linewidth = 2)
plt.xlabel('OOIP')
plt.ylabel('F(OOIP)')
plt.xlim (0, 17)
plt.ylim (-0.02, 1.02)
ax.set_yticks (np.arange(0,1.1,0.1))
ax.set_xticks (np.arange(0,17,2))
ax.grid (color = 'gray', linestyle = '--', linewidth = 0.6)
# plt.margins(0.02) # keep data off plot edges
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_7.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
inv_mcs = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
a = stats.norm.ppf (mcs_samples [i])
inv_mcs.append (a)
del a
time.sleep (0.00000001)
inv_clhs = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
a = stats.norm.ppf (classic_lhs_samples [i])
inv_clhs.append (a)
del a
time.sleep (0.00000001)
inv_maximin = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 3'):
a = stats.norm.ppf (maximin_lhs_samples [i])
inv_maximin.append (a)
del a
time.sleep (0.00000001)
inv_lhsmdu = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 4'):
a = stats.norm.ppf (first_array [i])
inv_lhsmdu.append (a)
del a
time.sleep (0.00000001)
inv_ppmt = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 5'):
a = stats.norm.ppf (ppmt_s [i])
inv_ppmt.append (a)
del a
time.sleep (0.00000001)
mcs_corr = []
for i in range (20):
i = []
mcs_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
a = np.matmul (l_matrix_, inv_mcs [i][j].T)
mcs_corr [i].append (a)
time.sleep (0.0000000001)
clhs_corr = []
for i in range (20):
i = []
clhs_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
a = np.matmul (l_matrix_, inv_clhs [i][j].T)
clhs_corr [i].append (a)
time.sleep (0.0000000001)
maximin_corr = []
for i in range (20):
i = []
maximin_corr.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 3'):
for j in range (100):
a = np.matmul (l_matrix_, inv_maximin [i][j].T)
maximin_corr [i].append (a)
time.sleep (0.0000000001)
for i in tqdm_notebook (range (10, 14, 1), total = 4, desc = 'Loop 4'):
for j in range (10):
a = np.matmul (l_matrix_, inv_maximin [i][j].T)
maximin_corr [i].append (a)
time.sleep (0.0000000001)
for i in tqdm_notebook (range (14, 20, 1), total = 6, desc = 'Loop 5'):
for j in range (1):
a = np.matmul (l_matrix_, inv_maximin [i][j].T)
maximin_corr [i].append (a)
time.sleep (0.0000000001)
lhsmdu_corr = []
for i in range (20):
i = []
lhsmdu_corr.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 6'):
for j in range (100):
a = np.matmul (l_matrix_, inv_lhsmdu [i][j].T)
lhsmdu_corr [i].append (a)
time.sleep (0.0000000001)
for i in tqdm_notebook (range (10, 14, 1), total = 4, desc = 'Loop 7'):
for j in range (10):
a = np.matmul (l_matrix_, inv_lhsmdu [i][j].T)
lhsmdu_corr [i].append (a)
time.sleep (0.0000000001)
for i in tqdm_notebook (range (14, 20, 1), total = 6, desc = 'Loop 8'):
for j in range (1):
a = np.matmul (l_matrix_, inv_lhsmdu [i][j].T)
lhsmdu_corr [i].append (a)
time.sleep (0.0000000001)
ppmt_corr = []
for i in range (20):
i = []
ppmt_corr.append (i)
for i in tqdm_notebook (range (17), total = 17, desc = 'Loop 9'):
for j in range (100):
a = np.matmul (l_matrix_, inv_ppmt [i][j].T)
ppmt_corr [i].append (a)
time.sleep (0.0000000001)
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 10'):
for j in range (10):
a = np.matmul (l_matrix_, inv_ppmt [i][j].T)
ppmt_corr [i].append (a)
time.sleep (0.0000000001)
mcs_corr_forw = []
for i in range (20):
i = []
mcs_corr_forw.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
g = stats.norm.cdf (mcs_corr [i][j])
f = g.T
mcs_corr_forw [i].append (f)
del g
del f
clhs_corr_forw = []
for i in range (20):
i = []
clhs_corr_forw.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
g = stats.norm.cdf (clhs_corr [i][j])
f = g.T
clhs_corr_forw [i].append (f)
del g
del f
maximin_corr_forw = []
for i in range (20):
i = []
maximin_corr_forw.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 3'):
for j in range (100):
g = stats.norm.cdf (maximin_corr [i][j])
f = g.T
maximin_corr_forw [i].append (f)
del g
del f
for i in tqdm_notebook (range (10, 14, 1), total = 4, desc = 'Loop 4'):
for j in range (10):
g = stats.norm.cdf (maximin_corr [i][j])
f = g.T
maximin_corr_forw [i].append (f)
del g
del f
for i in tqdm_notebook (range (14, 20, 1), total = 6, desc = 'Loop 5'):
for j in range (1):
g = stats.norm.cdf (maximin_corr [i][j])
f = g.T
maximin_corr_forw [i].append (f)
del g
del f
lhsmdu_corr_forw = []
for i in range (20):
i = []
lhsmdu_corr_forw.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 6'):
for j in range (100):
g = stats.norm.cdf (lhsmdu_corr [i][j])
f = g.T
lhsmdu_corr_forw [i].append (f)
del g
del f
for i in tqdm_notebook (range (10, 14, 1), total = 4, desc = 'Loop 7'):
for j in range (10):
g = stats.norm.cdf (lhsmdu_corr [i][j])
f = g.T
lhsmdu_corr_forw [i].append (f)
del g
del f
for i in tqdm_notebook (range (14, 20, 1), total = 6, desc = 'Loop 8'):
for j in range (1):
g = stats.norm.cdf (lhsmdu_corr [i][j])
f = g.T
lhsmdu_corr_forw [i].append (f)
del g
del f
ppmt_corr_forw = []
for i in range (20):
i = []
ppmt_corr_forw.append (i)
for i in tqdm_notebook (range (17), total = 17, desc = 'Loop 9'):
for j in range (100):
g = stats.norm.cdf (ppmt_corr [i][j])
f = g.T
ppmt_corr_forw [i].append (f)
del g
del f
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 10'):
for j in range (10):
g = stats.norm.cdf (ppmt_corr [i][j])
f = g.T
ppmt_corr_forw [i].append (f)
del g
del f
dataframes_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results_core [i][0], results_core [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes_core.append (x)
del x
del v
time.sleep (0.00000001)
g_corr = []
for i in range (20):
i = [[], [], [], [], []]
g_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'], mcs_corr_forw [i][j].T [k])
g_corr [i][k].append (f)
del f
time.sleep (0.00000001)
g_reals_corr = []
for i in range (20):
i = []
g_reals_corr.append (i)
for j in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for i in range (100):
dd = g_corr[j][0][i] * g_corr[j][1][i] * g_corr[j][2][i] * g_corr[j][3][i] * (1 - g_corr[j][4][i])
g_reals_corr [j].append (dd)
del dd
time.sleep (0.0000000001)
for $p= 0.1, 0.2, \ldots, 0.9$
q = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
list_dif_corr = []
for i in range (20):
i = []
list_dif_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_corr [i].append (diff_max)
del diff_max
del true
del sample
del differe
time.sleep (0.000000001)
mcs_diff_corr = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
s = np.mean (list_dif_corr [i])
mcs_diff_corr.append (s)
del s
time.sleep (0.000000001)
dataframes_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results_core [i][0], results_core [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes_core.append (x)
del x
del v
time.sleep (0.00000001)
g_clhs_corr = []
for i in range (20):
i = [[], [], [], [], []]
g_clhs_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'], clhs_corr_forw [i][j].T [k])
g_clhs_corr [i][k].append (f)
del f
time.sleep (0.00000001)
g_reals_clhs_corr = []
for i in range (20):
i = []
g_reals_clhs_corr.append (i)
for j in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for i in range (100):
nn = g_clhs_corr[j][0][i] * g_clhs_corr[j][1][i] * g_clhs_corr[j][2][i] * g_clhs_corr[j][3][i] * (1 - g_clhs_corr[j][4][i])
g_reals_clhs_corr [j].append (nn)
del nn
time.sleep (0.0000000001)
q = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
list_dif_clhs_corr = []
for i in range (20):
i = []
list_dif_clhs_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_clhs_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_clhs_corr [i].append (diff_max)
del diff_max
del true
del differe
del sample
time.sleep (0.000001)
clhs_diff_corr = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
sss = np.mean (list_dif_clhs_corr [i])
clhs_diff_corr.append (sss)
del sss
time.sleep (0.000000001)
dataframes_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results_core [i][0], results_core [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes_core.append (x)
del x
del v
time.sleep (0.00000001)
g_maxi_lhs_core = []
for i in range (20):
i = [[], [], [], [], []]
g_maxi_lhs_core.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
if i >= 0 and i < 10:
for j in range (100):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'], maximin_corr_forw [i][j].T [k])
g_maxi_lhs_core [i][k].append (f)
del f
elif i > 9 and i < 14:
for j in range (10):
for k in range (5):
ff = np.quantile (dataframes_core [k] ['values'], maximin_corr_forw [i][j].T [k])
g_maxi_lhs_core [i][k].append (ff)
del ff
elif i > 13 and i < 20:
for j in range (1):
for k in range (5):
ffd = np.quantile (dataframes_core [k] ['values'], maximin_corr_forw [i][j].T [k])
g_maxi_lhs_core [i][k].append (ffd)
del ffd
time.sleep (0.00000001)
g_reals_maxi_corr = []
for i in range (20):
i = []
g_reals_maxi_corr.append (i)
for j in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if j >= 0 and j < 10:
for i in range (100):
nns = g_maxi_lhs_core[j][0][i] * g_maxi_lhs_core[j][1][i] * g_maxi_lhs_core[j][2][i] * g_maxi_lhs_core[j][3][i] * (1 - g_maxi_lhs_core[j][4][i])
g_reals_maxi_corr [j].append (nns)
del nns
elif j > 9 and j < 14:
for i in range (10):
nts = g_maxi_lhs_core[j][0][i] * g_maxi_lhs_core[j][1][i] * g_maxi_lhs_core[j][2][i] * g_maxi_lhs_core[j][3][i] * (1 - g_maxi_lhs_core[j][4][i])
g_reals_maxi_corr [j].append (nts)
del nts
elif j > 13 and j < 20:
for i in range (1):
nps = g_maxi_lhs_core[j][0][i] * g_maxi_lhs_core[j][1][i] * g_maxi_lhs_core[j][2][i] * g_maxi_lhs_core[j][3][i] * (1 - g_maxi_lhs_core[j][4][i])
g_reals_maxi_corr [j].append (nps)
del nps
time.sleep (0.0000000001)
q = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
list_dif_maxi_corr = []
for i in range (20):
i = []
list_dif_maxi_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if i >= 0 and i < 10:
for j in range (100):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_maxi_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_maxi_corr [i].append (diff_max)
del diff_max
elif i > 9 and i < 14:
for j in range (10):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_maxi_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_maxi_corr [i].append (diff_max)
del diff_max
elif i > 13 and i < 20:
for j in range (1):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_maxi_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_maxi_corr [i].append (diff_max)
del diff_max
time.sleep (0.000001)
maximin_diff_corr = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
sss = np.mean (list_dif_maxi_corr [i])
maximin_diff_corr.append (sss)
del sss
time.sleep (0.0000000001)
dataframes_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results_core [i][0], results_core [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes_core.append (x)
del x
del v
time.sleep (0.00000001)
g_mdu_lhs_corr = []
for i in range (20):
i = [[], [], [], [], []]
g_mdu_lhs_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
if i >= 0 and i < 10:
for j in range (100):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'], lhsmdu_corr_forw [i][j].T [k])
g_mdu_lhs_corr [i][k].append (f)
del f
elif i > 9 and i < 14:
for j in range (10):
for k in range (5):
ff = np.quantile (dataframes_core [k] ['values'], lhsmdu_corr_forw [i][j].T [k])
g_mdu_lhs_corr [i][k].append (ff)
del ff
elif i > 13 and i < 20:
for j in range (1):
for k in range (5):
ffd = np.quantile (dataframes_core [k] ['values'], lhsmdu_corr_forw [i][j].T [k])
g_mdu_lhs_corr [i][k].append (ffd)
del ffd
time.sleep (0.00000001)
g_reals_mdulsh_corr = []
for i in range (20):
i = []
g_reals_mdulsh_corr.append (i)
for j in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if j >= 0 and j < 10:
for i in range (100):
nns = g_mdu_lhs_corr[j][0][i] *g_mdu_lhs_corr[j][1][i] * g_mdu_lhs_corr[j][2][i] * g_mdu_lhs_corr[j][3][i] * (1 - g_mdu_lhs_corr[j][4][i])
g_reals_mdulsh_corr [j].append (nns)
del nns
elif j > 9 and j < 14:
for i in range (10):
nts = g_mdu_lhs_corr[j][0][i] *g_mdu_lhs_corr[j][1][i] * g_mdu_lhs_corr[j][2][i] * g_mdu_lhs_corr[j][3][i] * (1 - g_mdu_lhs_corr[j][4][i])
g_reals_mdulsh_corr [j].append (nts)
del nts
elif j > 13 and j < 20:
for i in range (1):
nps = g_mdu_lhs_corr[j][0][i] *g_mdu_lhs_corr[j][1][i] * g_mdu_lhs_corr[j][2][i] * g_mdu_lhs_corr[j][3][i] * (1 - g_mdu_lhs_corr[j][4][i])
g_reals_mdulsh_corr [j].append (nps)
del nps
time.sleep (0.0000000001)
q = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
list_dif_mdu_corr = []
for i in range (20):
i = []
list_dif_mdu_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if i >= 0 and i < 10:
for j in range (100):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_mdulsh_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_mdu_corr [i].append (diff_max)
del diff_max
del true
del sample
del differe
elif i > 9 and i < 14:
for j in range (10):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_mdulsh_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_mdu_corr [i].append (diff_max)
del diff_max
del true
del sample
del differe
elif i > 13 and i < 20:
for j in range (1):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_mdulsh_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_mdu_corr [i].append (diff_max)
del diff_max
del true
del sample
del differe
time.sleep (0.000001)
lhsmdu_diff_corr = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
sss = np.mean (list_dif_mdu_corr [i])
lhsmdu_diff_corr.append (sss)
del sss
time.sleep (0.0000000001)
dataframes_core = []
for i in tqdm_notebook (range (5), total = 5, desc = 'Loop 1'):
v = zip (results_core [i][0], results_core [i][1])
x = pd.DataFrame (v, columns = ['values', 'cdf'])
dataframes_core.append (x)
del x
time.sleep (0.00000001)
g_ppmt_reals_core = []
for i in range (20):
i = [[], [], [], [], []]
g_ppmt_reals_core.append (i)
for i in tqdm_notebook (range (17), total = 17, desc = 'Loop 2'):
for j in range (100):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'],ppmt_corr_forw [i][j].T [k])
g_ppmt_reals_core [i][k].append (f)
del f
time.sleep (0.00000001)
for i in tqdm_notebook (range (17, 20, 1), total = 3, desc = 'Loop 3'):
for j in range (10):
for k in range (5):
f = np.quantile (dataframes_core [k] ['values'], ppmt_corr_forw [i][j].T [k])
g_ppmt_reals_core [i][k].append (f)
del f
time.sleep (0.00000001)
g_reals_ppmt_samp_corr = []
for i in range (20):
i = []
g_reals_ppmt_samp_corr.append (i)
for j in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if j >= 0 and j < 17:
for i in range (100):
nns = g_ppmt_reals_core[j][0][i] *g_ppmt_reals_core[j][1][i] * g_ppmt_reals_core[j][2][i] * g_ppmt_reals_core[j][3][i] * (1 - g_ppmt_reals_core[j][4][i])
g_reals_ppmt_samp_corr [j].append (nns)
del nns
elif j > 16 and j < 21:
for i in range (10):
nts = g_ppmt_reals_core[j][0][i] *g_ppmt_reals_core[j][1][i] * g_ppmt_reals_core[j][2][i] * g_ppmt_reals_core[j][3][i] * (1 - g_ppmt_reals_core[j][4][i])
g_reals_ppmt_samp_corr [j].append (nts)
del nts
time.sleep (0.0000000001)
q = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
list_dif_ppmt_corr = []
for i in range (20):
i = []
list_dif_ppmt_corr.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
if i >= 0 and i < 17:
for j in range (100):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_ppmt_samp_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_ppmt_corr [i].append (diff_max)
del diff_max
del sample
del differe
del true
elif i > 16 and i < 21:
for j in range (10):
true = np.quantile (x_popul_core, q)
sample = np.quantile (g_reals_ppmt_samp_corr [i][j], q)
differe = abs (true - sample)
diff_max = np.max (differe)
list_dif_ppmt_corr [i].append (diff_max)
del diff_max
del sample
del differe
del true
ppmt_diff_corr = []
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 2'):
sss = np.mean (list_dif_ppmt_corr [i])
ppmt_diff_corr.append (sss)
del sss
time.sleep (0.0000000001)
list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
7000, 9000, 10000]
evals = zip (list_of_reals, mcs_diff_corr, clhs_diff_corr, maximin_diff_corr, lhsmdu_diff_corr, ppmt_diff_corr)
e_vals_depen = pd.DataFrame (evals, columns = ['reals', 'mcs_diff', 'clhs_diff', 'maximin_diff', 'lhsmdu_diff', 'ppmt_diff'])
e_vals_depen.head ()
$ \textrm{e values} = a * \textrm{num of reals} ^{b}$\ $ log (\textrm{e values}) = log (a * \textrm{num of reals} ^{b})$\ $ log (\textrm{e values}) = log (a) + b * log (\textrm{num of reals})$\ $ log (\textrm{e values}) = log (a) + b * log (\textrm{num of reals})$ is a linear form of the power function
list_vals = ['mcs_diff', 'clhs_diff', 'maximin_diff', 'lhsmdu_diff', 'ppmt_diff']
models_corr = []
for i in tqdm_notebook (list_vals, total = 5, desc = 'Loop 1'):
a = np.log (e_vals_depen [[i, 'reals']])
model = ols (i + '~ reals', data = a).fit ()
models_corr.append (model)
del a
del model
time.sleep (0.000000001)
models_corr [3].summary ()
mcs_fit = [1.5776, -0.5040]
clhs_fit = [1.2439, -0.4923]
maxi_fit = [1.2909, -0.5106]
lhsmdu_fit = [1.1186, -0.4659]
ppmt = [1.0317, -0.4978]
param_power_corr = [mcs_fit, clhs_fit, maxi_fit, lhsmdu_fit, ppmt]
columns = ['mcs_fit', 'clhs_fit', 'maxi_fit', 'lhsmdu_fit', 'ppmt']
for i, j, k in tqdm_notebook (zip (range (5), columns, range (6, 11)), total = 5, desc = 'Loop 1'):
a = math.exp(param_power_corr [i][0]) * e_vals_depen [['reals']] ** (param_power_corr [i][1])
e_vals_depen.insert (loc = k, value = a, column = j)
del a
time.sleep (0.000000001)
e_vals_depen.head ()
mpl.style.use('default')
fig, ax = plt.subplots(figsize = (6, 3))
ax.scatter (e_vals_depen ['reals'], e_vals_depen ['mcs_diff'], s = 15, color = 'black', marker = 'o', label = 'MCS')
ax.plot (e_vals_depen ['reals'], e_vals_depen ['mcs_fit'], color = 'black', linestyle = 'dashed', lw = 0.8)
ax.scatter (e_vals_depen ['reals'], e_vals_depen ['clhs_diff'], s = 15, color = 'blue', marker = 'D', label = 'CLHS')
ax.plot (e_vals_depen ['reals'], e_vals_depen ['clhs_fit'], color = 'blue', linestyle = 'dashed', lw = 0.8)
ax.scatter (e_vals_depen ['reals'], e_vals_depen ['maximin_diff'], s = 15, color = 'green', marker = 's', label = 'Maximin LHS')
ax.plot (e_vals_depen ['reals'], e_vals_depen ['maxi_fit'], color = 'green', linestyle = 'dashed', lw = 0.8)
ax.scatter (e_vals_depen ['reals'], e_vals_depen ['lhsmdu_diff'], s = 15, color = 'red', marker = '*', label = 'LHSMDU')
ax.plot (e_vals_depen ['reals'], e_vals_depen ['lhsmdu_fit'], color = 'red', linestyle = 'dashed', lw = 0.8)
ax.scatter (e_vals_depen ['reals'], e_vals_depen ['ppmt_diff'], s = 15, color = 'orange', marker = '^', label = 'PPMT')
ax.plot (e_vals_depen ['reals'], e_vals_depen ['ppmt'], color = 'orange', linestyle = 'dashed', lw = 0.8)
ax.legend (loc = 1, frameon = True)
ax.set_xlim (10, 10000)
ax.set_ylim (0, 1.6)
#plt.grid (linestyle='--', linewidth=0.5)
ax.set_xlabel ('Number of realizations')
ax.set_ylabel ('Mean of e values')
ax.set_xscale('log')
ax.grid (color = 'gray', linestyle = '--', linewidth = 0.6, which = 'both', alpha = 0.65)
plt.savefig (outdir + 'figures/figure_8.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
x_samp_mcs_core = []
for i in range (20):
i = []
x_samp_mcs_core.append (i)
y_samp_mcs_core = []
for i in range (20):
i = []
y_samp_mcs_core.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
f, g = ecdf (g_reals_corr [i][j])
x_samp_mcs_core [i].append (f)
y_samp_mcs_core [i].append (g)
del f
del g
x_samp_clhs_core = []
for i in range (20):
i = []
x_samp_clhs_core.append (i)
y_samp_clhs_core = []
for i in range (20):
i = []
y_samp_clhs_core.append (i)
for i in tqdm_notebook (range (20), total = 20, desc = 'Loop 1'):
for j in range (100):
f, g = ecdf (g_reals_clhs_corr [i][j])
x_samp_clhs_core [i].append (f)
y_samp_clhs_core [i].append (g)
del f
del g
x_samp_maxi_core = []
for i in range (20):
i = []
x_samp_maxi_core.append (i)
y_samp_maxi_core = []
for i in range (20):
i = []
y_samp_maxi_core.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 1'):
for j in range (100):
f, g = ecdf (g_reals_maxi_corr [i][j])
x_samp_maxi_core [i].append (f)
y_samp_maxi_core [i].append (g)
del f
del g
x_samp_mdu_core = []
for i in range (20):
i = []
x_samp_mdu_core.append (i)
y_samp_mdu_core = []
for i in range (20):
i = []
y_samp_mdu_core.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 1'):
for j in range (100):
f, g = ecdf (g_reals_mdulsh_corr [i][j])
x_samp_mdu_core [i].append (f)
y_samp_mdu_core [i].append (g)
del f
del g
x_samp_ppmt_core = []
for i in range (20):
i = []
x_samp_ppmt_core.append (i)
y_samp_ppmt_core = []
for i in range (20):
i = []
y_samp_ppmt_core.append (i)
for i in tqdm_notebook (range (10), total = 10, desc = 'Loop 1'):
for j in range (100):
f, g = ecdf (g_reals_ppmt_samp_corr [i][j])
x_samp_ppmt_core [i].append (f)
y_samp_ppmt_core [i].append (g)
del f
del g
mpl.style.use('default')
fig, axes = plt.subplots(2, 3, figsize = (16, 9))
axes [0, 0].plot (x_popul_core, y_popul_core, linestyle = '-', linewidth = 2, color = 'black', label = 'Truth')
sns.ecdfplot (x_samp_mcs_core[0][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '10')
sns.ecdfplot (x_samp_mcs_core[1][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '20')
sns.ecdfplot (x_samp_mcs_core[2][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '30')
sns.ecdfplot (x_samp_mcs_core[3][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '40')
sns.ecdfplot (x_samp_mcs_core[4][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '50')
sns.ecdfplot (x_samp_mcs_core[5][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '60')
sns.ecdfplot (x_samp_mcs_core[6][0], ax = axes [0, 0], linewidth = 1, alpha = 0.65, label = '70')
axes[0, 0].legend (frameon = True, loc = 4)
axes[0, 0].set (xlabel = 'OOIP', ylabel = 'F(OOIP)')
axes[0, 0].set_title ('MCS')
axes[0, 0].set_xlim (0, 16)
axes[0, 0].set_ylim (0, 1)
axes [0, 0].set_yticks (np.arange(0,1.1,0.1))
axes [0, 0].set_xticks (np.arange(0,21,2))
axes [0, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [0, 1].plot (x_popul_core, y_popul_core, linestyle = '-', linewidth = 2, color = 'black', label = 'Truth')
sns.ecdfplot (x_samp_clhs_core [0][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '10')
sns.ecdfplot (x_samp_clhs_core [1][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '20')
sns.ecdfplot (x_samp_clhs_core [2][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '30')
sns.ecdfplot (x_samp_clhs_core [3][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '40')
sns.ecdfplot (x_samp_clhs_core [4][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '50')
sns.ecdfplot (x_samp_clhs_core [5][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '60')
sns.ecdfplot (x_samp_clhs_core [6][1], ax = axes [0, 1], linewidth = 1, alpha = 0.65, label = '70')
axes[0, 1].legend (frameon = True, loc = 4)
axes [0, 1].set (xlabel = 'OOIP', ylabel = 'F(OOIP)')
axes[0, 1].set_title ('CLHS')
plt.xlim (0, 16)
plt.ylim (0, 1)
axes [0, 1].set_yticks (np.arange(0,1.1,0.1))
axes [0, 1].set_xticks (np.arange(0,21,2))
axes [0, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [0, 2].plot (x_popul_core, y_popul_core, linestyle = '-', linewidth = 2, color = 'black', label = 'Truth')
sns.ecdfplot (x_samp_maxi_core [0][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '10')
sns.ecdfplot (x_samp_maxi_core [1][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '20')
sns.ecdfplot (x_samp_maxi_core [2][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '30')
sns.ecdfplot (x_samp_maxi_core [3][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '40')
sns.ecdfplot (x_samp_maxi_core [4][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '50')
sns.ecdfplot (x_samp_maxi_core [5][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '60')
sns.ecdfplot (x_samp_maxi_core [6][2], ax = axes [0, 2], linewidth = 1, alpha = 0.65, label = '70')
axes[0, 2].legend (frameon = True, loc = 4)
axes [0, 2].set (xlabel = 'OOIP', ylabel = 'F(OOIP)')
axes[0, 2].set_title ('Maximin LHS')
plt.xlim (0, 16)
plt.ylim (0, 1)
axes [0, 2].set_yticks (np.arange(0,1.1,0.1))
axes [0, 2].set_xticks (np.arange(0,21,2))
axes [0, 2].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [1, 0].plot (x_popul_core, y_popul_core, linestyle = '-', linewidth = 2, color = 'black', label = 'Truth')
sns.ecdfplot (x_samp_mdu_core [0][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '10')
sns.ecdfplot (x_samp_mdu_core [1][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '20')
sns.ecdfplot (x_samp_mdu_core [2][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '30')
sns.ecdfplot (x_samp_mdu_core [3][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '40')
sns.ecdfplot (x_samp_mdu_core [4][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '50')
sns.ecdfplot (x_samp_mdu_core [5][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '60')
sns.ecdfplot (x_samp_mdu_core [6][3], ax = axes [1, 0], linewidth = 1, alpha = 0.65, label = '70')
axes[1, 0].legend (frameon = True, loc = 4)
axes[1, 0].set (xlabel = 'OOIP', ylabel = 'F(OOIP)')
axes[1, 0].set_title ('LHSMDU')
plt.xlim (0, 16)
plt.ylim (0, 1)
axes [1, 0].set_yticks (np.arange(0,1.1,0.1))
axes [1, 0].set_xticks (np.arange(0,21,2))
axes [1, 0].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
axes [1, 1].plot (x_popul_core, y_popul_core, linestyle = '-', linewidth = 2, color = 'black', label = 'Truth')
sns.ecdfplot (x_samp_ppmt_core [0][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '10')
sns.ecdfplot (x_samp_ppmt_core [1][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '20')
sns.ecdfplot (x_samp_ppmt_core [2][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '30')
sns.ecdfplot (x_samp_ppmt_core [3][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '40')
sns.ecdfplot (x_samp_ppmt_core [4][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '50')
sns.ecdfplot (x_samp_ppmt_core [5][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '60')
sns.ecdfplot (x_samp_ppmt_core [6][4], ax = axes [1, 1], linewidth = 1, alpha = 0.65, label = '70')
axes[1, 1].legend (frameon = True, loc = 4)
axes[1, 1].set (xlabel = 'OOIP', ylabel = 'F(OOIP)')
axes[1, 1].set_title ('PPMT')
plt.xlim (0, 16)
plt.ylim (0, 1)
axes [1, 1].set_yticks (np.arange(0,1.1,0.1))
axes [1, 1].set_xticks (np.arange(0,21,2))
axes [1, 1].grid (color = 'gray', linestyle = '--', linewidth = 0.5)
fig.delaxes (axes[1][2])
plt.subplots_adjust (left = 0.125, bottom = 0.1, right = 0.75, top = 0.9, wspace = 0.35, hspace = 0.45)
plt.savefig (outdir + 'figures/figure_9.png', bbox_inches = 'tight', dpi = 300)
plt.show ()
The following code will clean up the directory. Please use (Ctrl and /) to activate the code.
# figures = []
# for i in range (9):
# i = i + 1
# z = './output/figures/figure_{}.png'.format (i)
# figures.append (z)
# list_of_reals = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 3000, 5000,
# 7000, 9000, 10000]
# lhsmdus = []
# for i in range (100):
# i = i + 1
# for j in list_of_reals:
# k = './output/lhsmdu/lhsmdu_{}_{}.out'.format (i, j)
# lhsmdus.append (k)
# mcs_ppmt = []
# for i in range (100):
# i = i + 1
# for j in list_of_reals:
# k = './output/ppmt/datafl_{}_{}.out'.format (i, j)
# mcs_ppmt.append (k)
# ppmt = []
# for i in range (100):
# i = i + 1
# for j in list_of_reals:
# k = './output/ppmt/ppmt_1/ppmt_{}_{}.out'.format (i, j)
# ppmt.append (k)
# files = [figures, lhsmdus, mcs_ppmt, ppmt]
# gs.rmfile (['temp', 'nofile', 'nofilr'])
# for i in files:
# gs.rmfile (i)